data summary

Laliga_2021 |> 
  glimpse()
## Rows: 373
## Columns: 18
## $ league        <chr> "La_liga", "La_liga", "La_liga", "La_liga", "La_liga", "…
## $ season        <chr> "2021/2022", "2021/2022", "2021/2022", "2021/2022", "202…
## $ match_id      <chr> "17136", "17138", "17139", "17137", "17140", "17141", "1…
## $ isResult      <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
## $ home_id       <chr> "146", "239", "261", "157", "158", "152", "148", "138", …
## $ home_team     <chr> "Valencia", "Mallorca", "Cadiz", "Osasuna", "Alaves", "C…
## $ home_abbr     <chr> "VAL", "MAL", "CAD", "OSA", "ALA", "CEL", "BAR", "SEV", …
## $ away_id       <chr> "142", "153", "151", "141", "150", "143", "140", "145", …
## $ away_team     <chr> "Getafe", "Real Betis", "Levante", "Espanyol", "Real Mad…
## $ away_abbr     <chr> "GET", "BET", "LEV", "ESP", "RMA", "ATL", "SOC", "RVL", …
## $ home_goals    <dbl> 1, 1, 1, 0, 1, 1, 4, 3, 0, 0, 1, 0, 1, 0, 1, 1, 1, 3, 0,…
## $ away_goals    <dbl> 0, 1, 1, 0, 4, 2, 2, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 3, 1,…
## $ home_xG       <dbl> 1.578610, 0.569578, 0.993589, 0.579404, 1.410970, 1.5939…
## $ away_xG       <dbl> 1.193260, 0.814085, 0.915954, 0.583698, 2.155510, 1.8050…
## $ datetime      <chr> "2021-08-13 19:00:00", "2021-08-14 17:30:00", "2021-08-1…
## $ forecast_win  <dbl> 0.4959, 0.2253, 0.3656, 0.2966, 0.1984, 0.2949, 0.9194, …
## $ forecast_draw <dbl> 0.2600, 0.3826, 0.3362, 0.4217, 0.2402, 0.2955, 0.0596, …
## $ forecast_loss <dbl> 0.2441, 0.3921, 0.2982, 0.2817, 0.5614, 0.4096, 0.0210, …
Laliga_2021 |> 
  head() |> 
  view()

# wide → long
Laliga_2021 <- Laliga_2021 |> 
  pivot_longer(cols = c(home_team,away_team),
               names_to = "home_away",
               values_to = "team") |>
  mutate(win = if_else(home_goals > away_goals,"home_team",
                       if_else(home_goals == away_goals,"draw","away_team")),
         point = if_else(home_away == win,3,
                         if_else(win == "draw",1,0)))

ranking

Ranking <- Laliga_2021 |> 
  group_by(team) |> 
  summarise(points = sum(point)) |> 
  arrange(desc(points))
DT::datatable(Ranking)
top_10 <- Ranking |> 
  head(10) |> 
  pull(team)

# 順位の推移
g <- Laliga_2021 |> 
  select(team,datetime,point) |> 
  group_by(team) |> 
  mutate(points = cumsum(point)) |> 
  filter(team %in% c(top_10)) |> 
  ggplot(aes(datetime,points, group = 1, color = team)) +
  geom_line()
plotly::ggplotly(g)

Home/Away

# xG_xGA
Laliga_2021 |> 
  mutate(xG = if_else(home_away == "home_team",home_xG,away_xG),
         xGA = if_else(home_away == "home_team",away_xG,home_xG)) |> 
  group_by(team) |> 
  summarise(xG = mean(xG),
            xGA = mean(xGA)) |> 
  ggplot(aes(xG,xGA, color = team, label = team)) + 
  geom_point()+
  ggrepel::geom_label_repel()+
  xlim(c(0,2.5))+
  ylim(c(0,2.5))
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

Laliga_2021 |> 
  mutate(xG = if_else(home_away == "home_team",home_xG,away_xG),
         xGA = if_else(home_away == "home_team",away_xG,home_xG)) |> 
  group_by(team) |> 
  summarise(mean_point = mean(point),
            xG = mean(xG),
            xGA = mean(xGA)) |> 
  ggplot(aes(xG,mean_point, color = team, label = team)) + 
  geom_point()+
  geom_abline(slope = 1)+
  ggrepel::geom_label_repel()+
  xlim(c(0,2.5))+
  ylim(c(0,2.5))
## Warning: ggrepel: 7 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps